Reading in the raw counts matrix and meta data, populating the infercnv object
infercnv_obj = CreateInfercnvObject(
raw_counts_matrix="oligodendroglioma_expression_downsampled.counts.matrix",
annotations_file="oligodendroglioma_annotations_downsampled.txt",
delim="\t",
gene_order_file="gencode_downsampled.txt",
ref_group_names=c("Microglia/Macrophage","Oligodendrocytes (non-malignant)"))
<<<<<<< HEAD
## INFO [2018-10-22 13:10:54] ::order_reduce:Start.
## INFO [2018-10-22 13:10:54] .order_reduce(): expr and order match.
## INFO [2018-10-22 13:10:54] ::process_data:order_reduce:Reduction from positional data, new dimensions (r,c) = 10338,184 Total=18322440.6799817 Min=0 Max=34215.
## INFO [2018-10-22 13:10:54] validating infercnv_obj
=======
## INFO [2018-10-31 13:35:02] ::order_reduce:Start.
## INFO [2018-10-31 13:35:02] .order_reduce(): expr and order match.
## INFO [2018-10-31 13:35:02] ::process_data:order_reduce:Reduction from positional data, new dimensions (r,c) = 10338,184 Total=18322440.6799817 Min=0 Max=34215.
## INFO [2018-10-31 13:35:02] validating infercnv_obj
>>>>>>> 29a0b973d2701fe5ea2834efcd6a82dd542e0308
Removing those genes that are very lowly expressed or present in very few cells
# filter out low expressed genes
cutoff=1
infercnv_obj <- require_above_min_mean_expr_cutoff(infercnv_obj, cutoff)
<<<<<<< HEAD
## INFO [2018-10-22 13:10:54] ::above_min_mean_expr_cutoff:Start
## INFO [2018-10-22 13:10:55] Removing 3025 genes from matrix as below mean expr threshold: 1
## INFO [2018-10-22 13:10:55] validating infercnv_obj
## INFO [2018-10-22 13:10:55] There are 7313 genes and 184 cells remaining in the expr matrix.
# filter out bad cells
min_cells_per_gene=3
infercnv_obj <- require_above_min_cells_ref(infercnv_obj, min_cells_per_gene=min_cells_per_gene)
## INFO [2018-10-22 13:10:55] Removed 117 genes having fewer than 3 min cells per gene = 1.59989 % genes removed here
## INFO [2018-10-22 13:10:55] validating infercnv_obj
=======
## INFO [2018-10-31 13:35:02] ::above_min_mean_expr_cutoff:Start
## INFO [2018-10-31 13:35:02] Removing 3025 genes from matrix as below mean expr threshold: 1
## INFO [2018-10-31 13:35:02] validating infercnv_obj
## INFO [2018-10-31 13:35:02] There are 7313 genes and 184 cells remaining in the expr matrix.
# filter out bad cells
min_cells_per_gene=3
infercnv_obj <- require_above_min_cells_ref(infercnv_obj, min_cells_per_gene=min_cells_per_gene)
## INFO [2018-10-31 13:35:02] Removed 117 genes having fewer than 3 min cells per gene = 1.59989 % genes removed here
## INFO [2018-10-31 13:35:02] validating infercnv_obj
>>>>>>> 29a0b973d2701fe5ea2834efcd6a82dd542e0308
## for safe keeping
infercnv_orig_filtered = infercnv_obj
#plot_mean_chr_expr_lineplot(infercnv_obj)
save('infercnv_obj', file = 'infercnv_obj.orig_filtered')
Perform a total sum normalization. Generates counts-per-million or counts-per-100k, depending on the overall sequencing depth.
infercnv_obj <- infercnv:::normalize_counts_by_seq_depth(infercnv_obj)
<<<<<<< HEAD
## INFO [2018-10-22 13:10:55] Computed total sum normalization factor as: 100000.000000
Suggested for removing noisy variation at low counts
=======## INFO [2018-10-31 13:35:03] Computed total sum normalization factor as: 100000.000000
Add ~0x and 2x variation to an artificial spike-in data set based on the normal cells so we can track and later scale residual expression data to this level of variation.
infercnv_obj <- spike_in_variation_chrs(infercnv_obj)
## INFO [2018-10-31 13:35:03] Selecting longest chrs for adding spike: chr1,chr2
## INFO [2018-10-31 13:35:03] processing group: malignant_MGH36
## INFO [2018-10-31 13:35:03] processing group: malignant_MGH53
## INFO [2018-10-31 13:35:03] processing group: malignant_93
## INFO [2018-10-31 13:35:03] processing group: malignant_97
## INFO [2018-10-31 13:35:04] processing group: Microglia/Macrophage
## INFO [2018-10-31 13:35:04] processing group: Oligodendrocytes (non-malignant)
## INFO [2018-10-31 13:35:22] validating infercnv_obj
Useful noise reduction method.
See: https://en.wikipedia.org/wiki/Anscombe_transform
infercnv_obj <- infercnv:::anscombe_transform(infercnv_obj)
save('infercnv_obj', file='infercnv_obj.anscombe')
infercnv_obj <- log2xplus1(infercnv_obj)
<<<<<<< HEAD
## INFO [2018-10-22 13:10:56] transforming log2xplus1()
=======
## INFO [2018-10-31 13:35:23] transforming log2xplus1()
>>>>>>> 29a0b973d2701fe5ea2834efcd6a82dd542e0308
save('infercnv_obj', file='infercnv_obj.log_transformed')
Here we define a threshold by taking the mean of the bounds of expression data across all cells. This is then use to define a cap for the bounds of all data.
threshold = mean(abs(get_average_bounds(infercnv_obj)))
infercnv_obj <- apply_max_threshold_bounds(infercnv_obj, threshold=threshold)
<<<<<<< HEAD
## INFO [2018-10-22 13:10:57] ::process_data:setting max centered expr, threshold set to: +/-: 4.33717214157707
=======
## INFO [2018-10-31 13:35:24] ::process_data:setting max centered expr, threshold set to: +/-: 4.29366139176392
>>>>>>> 29a0b973d2701fe5ea2834efcd6a82dd542e0308
plot_cnv(infercnv_obj,
output_filename='infercnv.logtransf',
x.range="auto",
title = "Before InferCNV (filtered & log2 transformed)",
color_safe_pal = FALSE,
x.center = mean(infercnv_obj@expr.data))
knitr::include_graphics("infercnv.logtransf.png")
<<<<<<< HEAD
The expression values are
infercnv_obj = smooth_by_chromosome(infercnv_obj, window_length=101, smooth_ends=TRUE)
<<<<<<< HEAD
## INFO [2018-10-22 13:11:14] ::smooth_window:Start.
## INFO [2018-10-22 13:11:14] ::smooth_window:Start.
## INFO [2018-10-22 13:11:15] ::smooth_window:Start.
## INFO [2018-10-22 13:11:15] ::smooth_window:Start.
## INFO [2018-10-22 13:11:15] ::smooth_window:Start.
## INFO [2018-10-22 13:11:16] ::smooth_window:Start.
## INFO [2018-10-22 13:11:16] ::smooth_window:Start.
## INFO [2018-10-22 13:11:16] ::smooth_window:Start.
## INFO [2018-10-22 13:11:17] ::smooth_window:Start.
## INFO [2018-10-22 13:11:17] ::smooth_window:Start.
## INFO [2018-10-22 13:11:17] ::smooth_window:Start.
## INFO [2018-10-22 13:11:17] ::smooth_window:Start.
## INFO [2018-10-22 13:11:18] ::smooth_window:Start.
## INFO [2018-10-22 13:11:18] ::smooth_window:Start.
## INFO [2018-10-22 13:11:18] ::smooth_window:Start.
## INFO [2018-10-22 13:11:18] ::smooth_window:Start.
## INFO [2018-10-22 13:11:19] ::smooth_window:Start.
## INFO [2018-10-22 13:11:19] ::smooth_window:Start.
## WARN [2018-10-22 13:11:19] window length exceeds number of rows in data
## WARN [2018-10-22 13:11:19] setting window length to nrows
## INFO [2018-10-22 13:11:19] ::smooth_window:Start.
## INFO [2018-10-22 13:11:20] ::smooth_window:Start.
## INFO [2018-10-22 13:11:20] ::smooth_window:Start.
## WARN [2018-10-22 13:11:20] window length exceeds number of rows in data
## WARN [2018-10-22 13:11:20] setting window length to nrows
## INFO [2018-10-22 13:11:20] ::smooth_window:Start.
## INFO [2018-10-22 13:11:20] ::smooth_window:Start.
## INFO [2018-10-22 13:11:20] ::smooth_window:Start.
## WARN [2018-10-22 13:11:20] window length exceeds number of rows in data
## WARN [2018-10-22 13:11:20] setting window length to nrows
=======
## INFO [2018-10-31 13:35:48] ::smooth_window:Start.
## INFO [2018-10-31 13:35:48] ::smooth_window:Start.
## INFO [2018-10-31 13:35:49] ::smooth_window:Start.
## INFO [2018-10-31 13:35:49] ::smooth_window:Start.
## INFO [2018-10-31 13:35:49] ::smooth_window:Start.
## INFO [2018-10-31 13:35:49] ::smooth_window:Start.
## INFO [2018-10-31 13:35:50] ::smooth_window:Start.
## INFO [2018-10-31 13:35:50] ::smooth_window:Start.
## INFO [2018-10-31 13:35:50] ::smooth_window:Start.
## INFO [2018-10-31 13:35:51] ::smooth_window:Start.
## INFO [2018-10-31 13:35:51] ::smooth_window:Start.
## INFO [2018-10-31 13:35:51] ::smooth_window:Start.
## INFO [2018-10-31 13:35:51] ::smooth_window:Start.
## INFO [2018-10-31 13:35:52] ::smooth_window:Start.
## INFO [2018-10-31 13:35:52] ::smooth_window:Start.
## INFO [2018-10-31 13:35:52] ::smooth_window:Start.
## INFO [2018-10-31 13:35:53] ::smooth_window:Start.
## INFO [2018-10-31 13:35:53] ::smooth_window:Start.
## INFO [2018-10-31 13:35:53] ::smooth_window:Start.
## INFO [2018-10-31 13:35:53] ::smooth_window:Start.
## INFO [2018-10-31 13:35:54] ::smooth_window:Start.
## INFO [2018-10-31 13:35:54] ::smooth_window:Start.
## INFO [2018-10-31 13:35:54] ::smooth_window:Start.
## INFO [2018-10-31 13:35:54] ::smooth_window:Start.
>>>>>>> 29a0b973d2701fe5ea2834efcd6a82dd542e0308
save('infercnv_obj', file='infercnv_obj.smooth_by_chr')
# re-center each cell
infercnv_obj <- center_cell_expr_across_chromosome(infercnv_obj, method = "median")
<<<<<<< HEAD
## INFO [2018-10-22 13:11:21] ::center_smooth across chromosomes per cell
=======
## INFO [2018-10-31 13:35:55] ::center_smooth across chromosomes per cell
>>>>>>> 29a0b973d2701fe5ea2834efcd6a82dd542e0308
save('infercnv_obj', file='infercnv_obj.cells_recentered')
plot_cnv(infercnv_obj,
output_filename='infercnv.chr_smoothed',
x.range="auto",
title = "chr smoothed and cells re-centered",
color_safe_pal = FALSE)
knitr::include_graphics("infercnv.chr_smoothed.png")
<<<<<<< HEAD